library(gapminder)
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.0.0 ✔ purrr 0.2.5
## ✔ tibble 1.4.2 ✔ dplyr 0.7.6
## ✔ tidyr 0.8.1 ✔ stringr 1.3.1
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(knitr)
Part 1: Factor management
summary(gapminder)
## country continent year lifeExp
## Afghanistan: 12 Africa :624 Min. :1952 Min. :23.60
## Albania : 12 Americas:300 1st Qu.:1966 1st Qu.:48.20
## Algeria : 12 Asia :396 Median :1980 Median :60.71
## Angola : 12 Europe :360 Mean :1980 Mean :59.47
## Argentina : 12 Oceania : 24 3rd Qu.:1993 3rd Qu.:70.85
## Australia : 12 Max. :2007 Max. :82.60
## (Other) :1632
## pop gdpPercap
## Min. :6.001e+04 Min. : 241.2
## 1st Qu.:2.794e+06 1st Qu.: 1202.1
## Median :7.024e+06 Median : 3531.8
## Mean :2.960e+07 Mean : 7215.3
## 3rd Qu.:1.959e+07 3rd Qu.: 9325.5
## Max. :1.319e+09 Max. :113523.1
##
A better structure of the gapminder dataset
gapminder %>%
group_by(continent) %>%
knitr::kable() %>%
head()
## [1] "country continent year lifeExp pop gdpPercap"
## [2] "------------------------- ---------- ----- --------- ----------- ------------"
## [3] "Afghanistan Asia 1952 28.80100 8425333 779.4453"
## [4] "Afghanistan Asia 1957 30.33200 9240934 820.8530"
## [5] "Afghanistan Asia 1962 31.99700 10267083 853.1007"
## [6] "Afghanistan Asia 1967 34.02000 11537966 836.1971"
gapminder %>%
summarize(
n_row = nrow(gapminder),
cont_levels = nlevels(gapminder$country),
coun_levels = nlevels(gapminder$continent)) %>%
knitr::kable(col.names = c("Rows", "Countries", "Continents"))
| Rows | Countries | Continents |
|---|---|---|
| 1704 | 142 | 5 |
gapminder %>%
filter(continent == "Asia") %>%
group_by(country) %>%
select(country, gdpPercap) %>%
knitr::kable(col.names = c("Country", "GDP Percap")) %>%
head(10)
## [1] "Country GDP Percap"
## [2] "------------------- ------------"
## [3] "Afghanistan 779.4453"
## [4] "Afghanistan 820.8530"
## [5] "Afghanistan 853.1007"
## [6] "Afghanistan 836.1971"
## [7] "Afghanistan 739.9811"
## [8] "Afghanistan 786.1134"
## [9] "Afghanistan 978.0114"
## [10] "Afghanistan 852.3959"
Using the “arrange” function
gapminder %>%
filter(continent == "Asia") %>%
select(country, gdpPercap, year) %>%
mutate(max_GDP = max(gdpPercap)) %>%
arrange(max_GDP) %>%
knitr::kable() %>%
head(20)
## [1] "country gdpPercap year max_GDP"
## [2] "------------------- ------------ ----- ---------"
## [3] "Afghanistan 779.4453 1952 113523.1"
## [4] "Afghanistan 820.8530 1957 113523.1"
## [5] "Afghanistan 853.1007 1962 113523.1"
## [6] "Afghanistan 836.1971 1967 113523.1"
## [7] "Afghanistan 739.9811 1972 113523.1"
## [8] "Afghanistan 786.1134 1977 113523.1"
## [9] "Afghanistan 978.0114 1982 113523.1"
## [10] "Afghanistan 852.3959 1987 113523.1"
## [11] "Afghanistan 649.3414 1992 113523.1"
## [12] "Afghanistan 635.3414 1997 113523.1"
## [13] "Afghanistan 726.7341 2002 113523.1"
## [14] "Afghanistan 974.5803 2007 113523.1"
## [15] "Bahrain 9867.0848 1952 113523.1"
## [16] "Bahrain 11635.7995 1957 113523.1"
## [17] "Bahrain 12753.2751 1962 113523.1"
## [18] "Bahrain 14804.6727 1967 113523.1"
## [19] "Bahrain 18268.6584 1972 113523.1"
## [20] "Bahrain 19340.1020 1977 113523.1"
This shows the max gdpPercap by an Asian country in a certain year.
visualisation using ggplot
arranged_gap <- gapminder %>%
filter(continent == "Asia") %>%
select(country, gdpPercap, year) %>%
mutate(max_GDP = max(gdpPercap)) %>%
arrange(max_GDP)
arranged_gap %>%
ggplot(aes(max_GDP, year, colour = country)) +
geom_point()
this is weird. according to this graph, Yemen had the max gdp over the years.
arranged_gap %>%
filter(country == "Bangladesh") %>%
ggplot(aes(max_GDP, year)) +
geom_point()
ok. dont know whats going on with GDP. Let me try with lifeExp.
gapminder %>%
filter(continent == "Asia") %>%
group_by(country) %>%
select(country, lifeExp) %>%
knitr::kable(col.names = c("Country", "Life Expectancy")) %>%
head(10)
## [1] "Country Life Expectancy"
## [2] "------------------- ----------------"
## [3] "Afghanistan 28.80100"
## [4] "Afghanistan 30.33200"
## [5] "Afghanistan 31.99700"
## [6] "Afghanistan 34.02000"
## [7] "Afghanistan 36.08800"
## [8] "Afghanistan 38.43800"
## [9] "Afghanistan 39.85400"
## [10] "Afghanistan 40.82200"
nlevels(gapminder$lifeExp)
## [1] 0
nlevels(gapminder$gdpPercap)
## [1] 0
nlevels(gapminder$pop)
## [1] 0
nlevels(gapminder$country)
## [1] 142
nlevels(gapminder$continent)
## [1] 5
nlevels(gapminder$year)
## [1] 0
arranged_gap <- gapminder %>%
filter(continent == "Asia") %>%
select(country, lifeExp, year) %>%
mutate(max_LEx = max(lifeExp)) %>%
arrange(max_LEx)
gapminder %>%
filter(year > 1987) %>%
filter(continent == "Asia") %>%
select(country, year, lifeExp)
## # A tibble: 132 x 3
## country year lifeExp
## <fct> <int> <dbl>
## 1 Afghanistan 1992 41.7
## 2 Afghanistan 1997 41.8
## 3 Afghanistan 2002 42.1
## 4 Afghanistan 2007 43.8
## 5 Bahrain 1992 72.6
## 6 Bahrain 1997 73.9
## 7 Bahrain 2002 74.8
## 8 Bahrain 2007 75.6
## 9 Bangladesh 1992 56.0
## 10 Bangladesh 1997 59.4
## # ... with 122 more rows
arranged_lifeEx <- gapminder %>%
filter(year > 1987) %>%
filter(continent == "Asia") %>%
select(country, year, lifeExp)
arranged_lifeEx %>%
ggplot(aes(lifeExp, year, colour = country)) +
geom_point()
Dropping Oceania now.
gapminder %>%
filter(continent != "Oceania") %>%
droplevels()
## # A tibble: 1,680 x 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 1952 28.8 8425333 779.
## 2 Afghanistan Asia 1957 30.3 9240934 821.
## 3 Afghanistan Asia 1962 32.0 10267083 853.
## 4 Afghanistan Asia 1967 34.0 11537966 836.
## 5 Afghanistan Asia 1972 36.1 13079460 740.
## 6 Afghanistan Asia 1977 38.4 14880372 786.
## 7 Afghanistan Asia 1982 39.9 12881816 978.
## 8 Afghanistan Asia 1987 40.8 13867957 852.
## 9 Afghanistan Asia 1992 41.7 16317921 649.
## 10 Afghanistan Asia 1997 41.8 22227415 635.
## # ... with 1,670 more rows
levels(gapminder$continent)
## [1] "Africa" "Americas" "Asia" "Europe" "Oceania"
Checking for the continents after dropping Oceania.
noOc_gap <- gapminder %>%
filter(continent != "Oceania") %>%
droplevels()
levels(noOc_gap$continent)
## [1] "Africa" "Americas" "Asia" "Europe"
Plotting for lifeExp after dropping Oceania
noOc_gap %>%
ggplot(aes(continent, lifeExp, colour = continent)) +
geom_jitter() +
labs (x = "Continent",
y = "Life Expectancy",
title = "Life expectancy after dropping Oceania") +
theme_bw()
Part 2: File input and output (File I/O)
arranged_lifeEx <- gapminder %>%
filter(year > 1987) %>%
filter(continent == "Asia") %>%
select(country, year, lifeExp)
Trying to save the file as csv. as instructed in the assigment.
write.csv(arranged_lifeEx, file = "STAT545_hw05")
Trying to import using read.csv as instructed
read_csv("STAT545_hw05")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_integer(),
## country = col_character(),
## year = col_integer(),
## lifeExp = col_double()
## )
## # A tibble: 132 x 4
## X1 country year lifeExp
## <int> <chr> <int> <dbl>
## 1 1 Afghanistan 1992 41.7
## 2 2 Afghanistan 1997 41.8
## 3 3 Afghanistan 2002 42.1
## 4 4 Afghanistan 2007 43.8
## 5 5 Bahrain 1992 72.6
## 6 6 Bahrain 1997 73.9
## 7 7 Bahrain 2002 74.8
## 8 8 Bahrain 2007 75.6
## 9 9 Bangladesh 1992 56.0
## 10 10 Bangladesh 1997 59.4
## # ... with 122 more rows
Part 3: Visualisation design
library(plotly)
noOc_gap %>%
ggplot(aes(lifeExp, year)) +
geom_point(aes(colour = pop)) +
facet_wrap(~ continent) +
theme_bw() +
labs(x= "Life Expectancy", y = "Year") +
theme(axis.text = element_text(size = 8),
axis.title = element_text(size = 20),
strip.background = element_rect(fill = "purple"))
this graph shows that Asia had very high population (light blue) at some points, and its position with respect to Year and Life expectance.
plot_ly(noOc_gap,
x = ~lifeExp,
y = ~year,
z = ~pop,
type = "scatter",
mode = "markers",
opacity = 0.5)
## Warning: 'scatter' objects don't have these attributes: 'z'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'text', 'hovertext', 'mode', 'hoveron', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'hovertextsrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
I would like to try the 3D version.
plot_ly(noOc_gap,
x = ~lifeExp,
y = ~year,
z = ~pop,
type = "scatter3d",
mode = "markers",
opacity = 0.5)
# "3D" did not work. try "3d". case sensitive. work eith extending at the edges to enlarge or shrink with trackpad.
Part 4: Writing figures to file
hw05_fig <- noOc_gap %>%
ggplot(aes(lifeExp, year)) +
geom_point(aes(colour = pop)) +
facet_wrap(~ continent) +
theme_bw() +
labs(x= "Life Expectancy", y = "Year") +
theme(axis.text = element_text(size = 8),
axis.title = element_text(size = 20),
strip.background = element_rect(fill = "purple"))
ggsave("hw05_plot.png", hw05_fig)
## Saving 7 x 5 in image
changing the scaling of the figure
ggsave("hw05_plot2.png", hw05_fig, width = 50, height = 40, units = "cm", dpi = 600)
hw05_plot is better looking than hw05_plot2